set.seed(42)

# dependencies
library(tidyverse)
library(metafor)
library(knitr)
library(kableExtra)
library(psych)
library(ggExtra)
library(scales) 
#library(ggtext) # devtools::install_github("clauswilke/ggtext")


# function to round all numeric vars in a data frame
round_df <- function(df, n_digits = 3) {
  df %>% mutate_if(is.numeric, round, digits = n_digits)
}

probability_of_superiority_function <- function(x, y) {
  nx <- length(x)
  ny <- length(y)
  rx <- sum(rank(c(x, y))[1:nx])
  A = (rx / nx - (nx + 1) / 2) / ny
  return(A)
}

spearman_brown_correction <- function(r_pearson) {
  inversion <- ifelse(r_pearson < 0, -1, 1)
  val <- round(2*abs(r_pearson)/(1 + abs(r_pearson)), 2)
  return(val*inversion)
}

apa_p_value <- function(p){
  p_formatted <- ifelse(p >= 0.001, paste("=", round(p, 3)),
                        ifelse(p < 0.001, "< .001", NA))
  p_formatted <- gsub(pattern = "0.", replacement = ".", x = p_formatted, fixed = TRUE)
  p_formatted
}

add_heterogeneity_metrics_to_forest <- function(fit) {
  bquote(paste("RE Model (", tau^2, " = ", .(formatC(round(fit$tau2, 1))), 
               ", ", I^2, " = ", .(formatC(round(fit$I2, 1))),
               "%, ", H^2," = ", .(formatC(round(fit$H2, 1))), ")"))
}

# table format in output
options(knitr.table.format = "html") 

# create directory needed to save output
dir.create("models")
dir.create("plots")

Data

data_demographics <- 
  read_csv("../data/effect sizes for meta-analyses/data_demographics.csv")

data_D_scores_internal_consistency_oddeventrials <- 
  read_csv("../data/effect sizes for meta-analyses/data_D_scores_internal_consistency_oddeventrials.csv")

data_D_scores_internal_consistency_firstsecondhalves <- 
  read_csv("../data/effect sizes for meta-analyses/data_D_scores_internal_consistency_firstsecondhalves.csv") 

data_D_scores_internal_consistency_permuted_estimates <- 
  read_csv("../data/effect sizes for meta-analyses/data_D_scores_internal_consistency_permuted_estimates.csv")

data_A_scores_internal_consistency_permuted_estimates <- 
  read_csv("../data/effect sizes for meta-analyses/data_A_scores_internal_consistency_permuted_estimates.csv")

data_D_scores_internal_consistency_permuted_estimates_by_block_order <- 
  read_csv("../data/effect sizes for meta-analyses/data_D_scores_internal_consistency_permuted_estimates_by_block_order.csv") 

data_D_scores_test_retest_r <- 
  read_csv("../data/effect sizes for meta-analyses/data_D_scores_test_retest_r.csv") 

data_D_scores_test_retest_icc <- 
  read_csv("../data/effect sizes for meta-analyses/data_D_scores_test_retest_icc.csv") 

Demographics & sample sizes

data_demographics %>%
  summarize(mean_age = mean(age, na.rm = TRUE),
            sd_age = sd(age, na.rm = TRUE)) %>% 
  round_df(1) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
mean_age sd_age
20.1 4.4
data_demographics %>%
  count(gender) %>% 
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
gender n
Female 250
Male 145
Other 1
NA 1216
data_D_scores_internal_consistency_permuted_estimates %>%
  summarize(total_ic_n = sum(n)) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
total_ic_n
1464
data_D_scores_test_retest_icc %>%
  summarize(total_trt_n = sum(n)) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
total_trt_n
354

Internal consistency

Three methods of calculating internal consistency for the Implicit Relational Assessment Procedure (IRAP): - Odd vs even trials (mostly commonly reported in literature) - First vs second half of task (more common for the most popular implicit measure, the IAT; would allow a like-for-like comparison) - Cronbach’s alpha via permutation (most robust)

The IRAP effect is most often quantified using the D1 scoring algorithm (Greenwald, Banaji & Nosek, 2003). i.e., trim all RTs > 10,000 ms, then D1 = (mean RT in the inconsistent blocks - mean RT in the consistent blocks)/SD of RTs in both block types. D1 scores are used throughout. Because of this, it wasn’t possible to use Sam Parson’s splithalf R package. A workflow for permutation-based alpha estimation using D scores is provided below.

In each case, correlations between D scores calculated from the split halves are calculated, and then the Spearman-Brown prophecy formula is applied, i.e., r_sb = 2*r_pearson / (1+r_pearson). Importantly, this correction is applied to the absolute value of the pearson’s correlation, and then its sign is corrected to be congruent with the pearson’s correlation. This ensures that r_sb has a possible range of -1 to +1, as correlations should (whereas correction of native negative correlations allow for a lower limit of -Inf). I haven’t seen this treatment of SB corrections applied to negative correlations well explicated elsewhere, so it is important to note here.

For meta analysis, spearman brown correlations are converted to Fischers r-to-z for meta analysis. Estimates of the meta effect are then converted back for reporting. Heterogeneity metrics represent heterogeneity in Fischer’s r-to-z estimates. See here.

Split-half reliability via odd vs. even trials

Bonett transformations

# fit random Effects model 
fit_oddeven <-  data_D_scores_internal_consistency_oddeventrials %>%
  rma(yi   = yi, 
      vi   = vi, 
      data = .,
      slab = domain)

# make predictions 
predictions_oddeven <-
  predict(fit_oddeven, digits = 5) %>%
  as.data.frame() %>%
  round_df(2)

# plot
metafor::forest(fit_oddeven,
                xlab = "Spearman-Brown correlation",
                addcred = TRUE,
                refline = FALSE,
                transf = transf.iabt,
                xlim = c(-1.4, 1.6),
                at = c(0, 0.25, 0.5, 0.75, 1),
                mlab = add_heterogeneity_metrics_to_forest(fit_oddeven))
text(-1.4, 37, "Study", pos = 4)
text(1.6, 37, "Spearman-Brown correlation [95% CI]", pos = 2)

# summarize results
meta_effect <- 
  paste0("Meta analysis: k = ", fit_oddeven$k, 
         ", r_sb = ",  round(transf.iabt(predictions_oddeven$pred), 2), 
         ", 95% CI [", round(transf.iabt(predictions_oddeven$ci.lb), 2), ", ", 
         round(transf.iabt(predictions_oddeven$ci.ub), 2), "]", 
         ", 95% CR [", round(transf.iabt(predictions_oddeven$cr.lb), 2), ", ", 
         round(transf.iabt(predictions_oddeven$cr.ub), 2), "]") 

meta_heterogeneity <- 
  paste0("Heterogeneity tests: Q(df = ", fit_oddeven$k - 1, ") = ", round(fit_oddeven$QE, 2), 
         ", p ", ifelse(fit_oddeven$QEp < 0.0001, "< .0001", paste0("= ", as.character(round(fit_oddeven$QEp, 4)))),
         ", tau^2 = ", round(fit_oddeven$tau2, 2), 
         ", I^2 = ",   round(fit_oddeven$I2, 2),
         "%, H^2 = ",   round(fit_oddeven$H2, 2))

Meta effect: Meta analysis: k = 35, r_sb = 0.54, 95% CI [0.49, 0.59], 95% CR [0.49, 0.59].

Heterogeneity: Heterogeneity tests: Q(df = 34) = 43.41, p = 0.1293, tau^2 = 0, I^2 = 0.11%, H^2 = 1.

Split-half reliability via first vs. second half of the task

Bonett transformations

# fit random Effects model 
fit_firstsecond <- data_D_scores_internal_consistency_firstsecondhalves %>%
  rma(yi   = yi, 
      vi   = vi, 
      data = .,
      slab = domain)

# make predictions 
predictions_firstsecond <-
  predict(fit_firstsecond, digits = 5) %>%
  as.data.frame() %>%
  round_df(2)

# plot
metafor::forest(fit_firstsecond,
                xlab = "Spearman-Brown correlation",
                addcred = TRUE,
                refline = FALSE,
                transf = transf.iabt,
                xlim = c(-1.4, 1.6),
                at = c(0, 0.25, 0.5, 0.75, 1),
                mlab = add_heterogeneity_metrics_to_forest(fit_firstsecond))
text(-1.4, 37, "Study", pos = 4)
text(1.6, 37, "Spearman-Brown correlation [95% CI]", pos = 2)

# summarize results
meta_effect <- 
  paste0("Meta analysis: k = ", fit_firstsecond$k, 
         ", r_sb = ",  round(transf.iabt(predictions_firstsecond$pred), 2), 
         ", 95% CI [", round(transf.iabt(predictions_firstsecond$ci.lb), 2), ", ", 
         round(transf.iabt(predictions_firstsecond$ci.ub), 2), "]", 
         ", 95% CR [", round(transf.iabt(predictions_firstsecond$cr.lb), 2), ", ", 
         round(transf.iabt(predictions_firstsecond$cr.ub), 2), "]") 

meta_heterogeneity <- 
  paste0("Heterogeneity tests: Q(df = ", fit_firstsecond$k - 1, ") = ", round(fit_firstsecond$QE, 2), 
         ", p ", ifelse(fit_firstsecond$QEp < 0.0001, "< .0001", paste0("= ", as.character(round(fit_firstsecond$QEp, 4)))),
         ", tau^2 = ", round(fit_firstsecond$tau2, 2), 
         ", I^2 = ",   round(fit_firstsecond$I2, 2),
         "%, H^2 = ",   round(fit_firstsecond$H2, 2))

Meta effect: Meta analysis: k = 35, r_sb = 0.52, 95% CI [0.47, 0.57], 95% CR [0.47, 0.57].

Heterogeneity: Heterogeneity tests: Q(df = 34) = 34.87, p = 0.4264, tau^2 = 0, I^2 = 0%, H^2 = 1.

Permutation-based split half correlations

ie an estimate of alpha through large number number of random half splits (see Parsons, Kruijt, & Fox. 2019). Estimate of alpha obtained via mean of the resampled Spearman-Brown corrected split half correlations, when calculating D1 scores for each half and sampling 10000 permutations.

Plot native untransformed data.

ggplot(data_D_scores_internal_consistency_permuted_estimates, 
       aes(y = alpha, x = domain)) +
  geom_linerange(aes(ymin = alpha_ci_lower, ymax = alpha_ci_upper)) +
  geom_point() +
  coord_flip() +
  xlab("Domain") +
  ylab("Alpha") 

Bonett transformations

# fit random Effects model 
fit_internal_consistency_permuted_estimates <- 
  data_D_scores_internal_consistency_permuted_estimates %>%
  rma(yi   = yi, 
      vi   = vi, 
      data = .,
      slab = domain)

# make predictions 
predictions_permutations <-
  predict(fit_internal_consistency_permuted_estimates, digits = 5) %>%
  as.data.frame() %>%
  round_df(2)

# plot
metafor::forest(fit_internal_consistency_permuted_estimates,
                xlab = bquote(paste("Cronbach's ", alpha)),
                addcred = TRUE,
                refline = FALSE,
                transf = transf.iabt,
                xlim = c(-1.4, 1.6),
                at = c(0, 0.25, 0.5, 0.75, 1),
                mlab = add_heterogeneity_metrics_to_forest(fit_internal_consistency_permuted_estimates))
text(-1.4, 37, "Study", pos = 4)
text(1.6, 37, bquote(paste("Cronbach's ", alpha, " [95% CI]")), pos = 2)

# summarize results
meta_effect <- 
  paste0("Meta analysis: k = ", fit_internal_consistency_permuted_estimates$k, 
         ", alpha = ",  round(transf.iabt(predictions_permutations$pred), 2), 
         ", 95% CI [", round(transf.iabt(predictions_permutations$ci.lb), 2), ", ", 
         round(transf.iabt(predictions_permutations$ci.ub), 2), "]", 
         ", 95% CR [", round(transf.iabt(predictions_permutations$cr.lb), 2), ", ", 
         round(transf.iabt(predictions_permutations$cr.ub), 2), "]") 

meta_heterogeneity <- 
  paste0("Heterogeneity tests: Q(df = ", fit_internal_consistency_permuted_estimates$k - 1, ") = ", round(fit_internal_consistency_permuted_estimates$QE, 2), 
         ", p ", ifelse(fit_internal_consistency_permuted_estimates$QEp < 0.0001, "< .0001", paste0("= ", as.character(round(fit_internal_consistency_permuted_estimates$QEp, 4)))),
         ", tau^2 = ", round(fit_internal_consistency_permuted_estimates$tau2, 2), 
         ", I^2 = ",   round(fit_internal_consistency_permuted_estimates$I2, 2),
         "%, H^2 = ",   round(fit_internal_consistency_permuted_estimates$H2, 2))

# save to disk for making pdf plot
write_rds(fit_internal_consistency_permuted_estimates, "models/fit_internal_consistency_permuted_estimates.rds")

Meta effect: Meta analysis: k = 35, alpha = 0.54, 95% CI [0.48, 0.59], 95% CR [0.42, 0.62].

Heterogeneity: Heterogeneity tests: Q(df = 34) = 44.82, p = 0.1014, tau^2 = 0.01, I^2 = 7.87%, H^2 = 1.09.

GOSH

Sexuality IRAP seems to be an outlier

if(!file.exists("models/fit_gosh_ic.rds")){
  
  fit_gosh_ic <- gosh(fit_internal_consistency_permuted_estimates, 
                      subsets = 10000,
                      parallel = "multicore",  # change to "snow" on windows
                      ncpus = 4)
  
  write_rds(fit_gosh_ic, "models/fit_gosh_ic.rds")
  
} else {
  
  fit_gosh_ic <- read_rds("models/fit_gosh_ic.rds")
  
}

data_for_plotting_ic <- 
  bind_cols(as.tibble(fit_gosh_ic$incl) %>%
              # the study to be separated is here listed by order (counting down from top of forest plot)
              select(include1 = "23",
                     include2 = "24"),  
            as.tibble(fit_gosh_ic$res)) %>%
  mutate(IRAP = ifelse(include1 | include2, "Sexuality", "All other domains"),
         alpha = transf.iabt(estimate)) %>%
  # randomize rows to improve plotting
  sample_frac(size = 1)

p1 <- 
  ggplot(data_for_plotting_ic, aes(alpha, I2, color = IRAP)) +
  geom_point(alpha = 0.5) +
  scale_color_viridis_d(begin = 0.35, end = 0.65, direction = -1) +
  theme_classic() + 
  xlab(expression("Internal consistency (" * alpha * ")")) +
  ylab(expression("Heterogeneity (" * italic("I")^"2" * ")")) +
  theme(legend.position = c(0.2, 0.85), 
        legend.background = element_rect(fill = scales::alpha("white", 0)))

plot_gosh_ic <- 
  ggMarginal(p1, 
             type = "densigram",
             size = 4,
             margins = "both",
             groupFill = TRUE)

plot_gosh_ic

write_rds(plot_gosh_ic, "models/plot_gosh_ic.rds")

data_for_plotting_ic %>%
  group_by(IRAP) %>%
  summarize(alpha_median = median(alpha, na.rm = TRUE),
            alpha_ci_lower = quantile(alpha, 0.025, na.rm = TRUE),
            alpha_ci_upper = quantile(alpha, 0.975, na.rm = TRUE),
            I2_median = median(I2, na.rm = TRUE),
            I2_ci_lower = quantile(I2, 0.025, na.rm = TRUE),
            I2_ci_upper = quantile(I2, 0.975, na.rm = TRUE),
            H2_median = median(H2, na.rm = TRUE),
            H2_ci_lower = quantile(H2, 0.025, na.rm = TRUE),
            H2_ci_upper = quantile(H2, 0.975, na.rm = TRUE)) %>%
  round_df(2) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
IRAP alpha_median alpha_ci_lower alpha_ci_upper I2_median I2_ci_lower I2_ci_upper H2_median H2_ci_lower H2_ci_upper
All other domains 0.51 0.47 0.56 0.00 0 9.79 1.00 1 1.11
Sexuality 0.55 0.50 0.62 24.68 0 61.28 1.33 1 2.58

Sensitivity analysis

excluding Sexuality IRAPs as outliers

# fit random Effects model 
fit_internal_consistency_permuted_estimates_sensitivity <- 
  data_D_scores_internal_consistency_permuted_estimates %>%
  filter(!str_detect(domain, "Sexuality")) %>%
  rma(yi   = yi, 
      vi   = vi, 
      data = .,
      slab = domain)

# make predictions 
predictions_permutations_sensitivity <-
  predict(fit_internal_consistency_permuted_estimates_sensitivity, digits = 5) %>%
  as.data.frame() %>%
  round_df(2)

# plot
metafor::forest(fit_internal_consistency_permuted_estimates_sensitivity,
                xlab = bquote(paste("Cronbach's ", alpha)),
                addcred = TRUE,
                refline = FALSE,
                transf = transf.iabt,
                xlim = c(-1.4, 1.6),
                at = c(0, 0.25, 0.5, 0.75, 1),
                mlab = add_heterogeneity_metrics_to_forest(fit_internal_consistency_permuted_estimates_sensitivity))
text(-1.4, 35, "Study", pos = 4)
text(1.6, 35, bquote(paste("Cronbach's ", alpha, " [95% CI]")), pos = 2)

# summarize results
meta_effect <- 
  paste0("Meta analysis: k = ", fit_internal_consistency_permuted_estimates_sensitivity$k, 
         ", alpha = ",  round(transf.iabt(predictions_permutations_sensitivity$pred), 2), 
         ", 95% CI [", round(transf.iabt(predictions_permutations_sensitivity$ci.lb), 2), ", ", 
         round(transf.iabt(predictions_permutations_sensitivity$ci.ub), 2), "]", 
         ", 95% CR [", round(transf.iabt(predictions_permutations_sensitivity$cr.lb), 2), ", ", 
         round(transf.iabt(predictions_permutations_sensitivity$cr.ub), 2), "]") 

meta_heterogeneity <- 
  paste0("Heterogeneity tests: Q(df = ", fit_internal_consistency_permuted_estimates_sensitivity$k - 1, ") = ", round(fit_internal_consistency_permuted_estimates_sensitivity$QE, 2), 
         ", p ", ifelse(fit_internal_consistency_permuted_estimates_sensitivity$QEp < 0.0001, "< .0001", paste0("= ", as.character(round(fit_internal_consistency_permuted_estimates_sensitivity$QEp, 4)))),
         ", tau^2 = ", round(fit_internal_consistency_permuted_estimates_sensitivity$tau2, 2), 
         ", I^2 = ",   round(fit_internal_consistency_permuted_estimates_sensitivity$I2, 2),
         "%, H^2 = ",   round(fit_internal_consistency_permuted_estimates_sensitivity$H2, 2))

# save to disk for making pdf plot
write_rds(fit_internal_consistency_permuted_estimates_sensitivity, "models/fit_internal_consistency_permuted_estimates_sensitivity.rds")

Meta effect: Meta analysis: k = 33, alpha = 0.51, 95% CI [0.46, 0.56], 95% CR [0.46, 0.56].

Heterogeneity: Heterogeneity tests: Q(df = 32) = 21.59, p = 0.9179, tau^2 = 0, I^2 = 0%, H^2 = 1.

GOSH

if(!file.exists("models/fit_gosh_ic_sensitivity.rds")){
  
  fit_gosh_ic_sensitivity <- gosh(fit_internal_consistency_permuted_estimates_sensitivity, 
                                  subsets = 10000,
                                  parallel = "multicore",  # change to "snow" on windows
                                  ncpus = 4)
  
  write_rds(fit_gosh_ic_sensitivity, "models/fit_gosh_ic_sensitivity.rds")
  
} else {
  
  fit_gosh_ic_sensitivity <- read_rds("models/fit_gosh_ic_sensitivity.rds")
  
}

data_for_plotting_ic_sensitivity <- 
  as.tibble(fit_gosh_ic_sensitivity$res) %>%
  mutate(Type = "Other IRAPs",
         alpha = transf.iabt(estimate)) %>%
  # randomize rows to improve plotting
  sample_frac(size = 1)

p3 <- 
  ggplot(data_for_plotting_ic_sensitivity, aes(alpha, I2, color = Type)) +
  geom_point(alpha = 0.5) +
  scale_color_viridis_d(begin = 0.35, end = 0.65, direction = -1) +
  theme_classic() + 
  xlab(expression("Internal consistency (" * alpha * ")")) +
  ylab(expression("Heterogeneity (" * italic("I")^"2" * ")")) +
  theme(legend.position = c(0.2, 0.85)) +
  theme(legend.position = "none") 

plot_gosh_ic_sensitivity <- 
  ggMarginal(p3, 
             type = "densigram",
             size = 4,
             margins = "both",
             groupFill = TRUE)

plot_gosh_ic_sensitivity

write_rds(plot_gosh_ic_sensitivity, "models/plot_gosh_ic_sensitivity.rds")

data_for_plotting_ic_sensitivity %>%
  summarize(alpha_median = median(alpha, na.rm = TRUE),
            alpha_ci_lower = quantile(alpha, 0.025, na.rm = TRUE),
            alpha_ci_upper = quantile(alpha, 0.975, na.rm = TRUE),
            I2_median = median(I2, na.rm = TRUE),
            I2_ci_lower = quantile(I2, 0.025, na.rm = TRUE),
            I2_ci_upper = quantile(I2, 0.975, na.rm = TRUE),
            H2_median = median(H2, na.rm = TRUE),
            H2_ci_lower = quantile(H2, 0.025, na.rm = TRUE),
            H2_ci_upper = quantile(H2, 0.975, na.rm = TRUE)) %>%
  round_df(2) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
alpha_median alpha_ci_lower alpha_ci_upper I2_median I2_ci_lower I2_ci_upper H2_median H2_ci_lower H2_ci_upper
0.51 0.47 0.56 0 0 7.74 1 1 1.08

Test-retest reliability

Pearson’s r correlations

with r-to-z transformations.

fit_test_retest_r <- data_D_scores_test_retest_r %>%
  mutate(domain = ifelse(domain == "Sexuality (1)", "Sexuality (1) 7-day followup", domain)) %>%
  rma(yi   = yi, 
      vi   = vi, 
      data = .,
      slab = domain) 

# make predictions 
predictions_test_retest_r <-
  predict(fit_test_retest_r, digits = 5) %>%
  as.data.frame() %>%
  round_df(2) 

# plot
metafor::forest(fit_test_retest_r,
                xlab = "Pearson's r correlations",
                transf = transf.ztor,
                addcred = TRUE,
                refline = FALSE,
                xlim = c(-2.5, 2),
                at = c(-1, -0.5, 0, 0.5, 1),
                mlab = add_heterogeneity_metrics_to_forest(fit_test_retest_r))
text(-2.5, 10, "Study", pos = 4)
text(2, 10, "r [95% CI]", pos = 2)

# summarize results
meta_effect <- 
  paste0("Meta analysis: k = ", fit_test_retest_r$k, 
         ", r = ",  round(transf.ztor(predictions_test_retest_r$pred), 2), 
         ", 95% CI [", round(transf.ztor(predictions_test_retest_r$ci.lb), 2), ", ", 
         round(transf.ztor(predictions_test_retest_r$ci.ub), 2), "]", 
         ", 95% CR [", round(transf.ztor(predictions_test_retest_r$cr.lb), 2), ", ", 
         round(transf.ztor(predictions_test_retest_r$cr.ub), 2), "]") 

meta_heterogeneity <- 
  paste0("Heterogeneity tests: Q(df = ", fit_test_retest_r$k - 1, ") = ", round(fit_test_retest_r$QE, 2), 
         ", p ", ifelse(fit_test_retest_r$QEp < 0.0001, "< .0001", paste0("= ", as.character(round(fit_test_retest_r$QEp, 4)))),
         ", tau^2 = ", round(fit_test_retest_r$tau2, 2), 
         ", I^2 = ",   round(fit_test_retest_r$I2, 2),
         "%, H^2 = ",   round(fit_test_retest_r$H2, 2))

# write to disk for pdf plots
write_rds(fit_test_retest_r, "models/fit_test_retest_r.rds")

Meta effect: Meta analysis: k = 8, r = 0.14, 95% CI [-0.07, 0.35], 95% CR [-0.39, 0.6].

Heterogeneity: Heterogeneity tests: Q(df = 7) = 24.58, p = 9e-04, tau^2 = 0.07, I^2 = 72.78%, H^2 = 3.67.

Intraclass Correlations

“It has been argued that test-retest reliability should reflect absolute agreement, rather than consistency, between measurements (Koo & Li, 2016). For example, a perfect correlation between scores at two time points may occur also when there is a systematic difference between time points (i.e. a difference that is about equal for all participants).” (Parsons, Kruijt, & Fox, 2019). Absolute agreement therefore also takes within-participant changes into account in its denominator, where consistency does not.

# fit random Effects model 
fit_test_retest_icc <- data_D_scores_test_retest_icc %>%
  mutate(domain = ifelse(domain == "Sexuality (1)", "Sexuality (1) 7-day followup", domain)) %>%
  rma(yi   = ICC, 
      sei  = se, 
      data = .,
      slab = domain)

# make predictions 
predictions_test_retest_icc <-
  predict(fit_test_retest_icc, digits = 5) %>%
  as.data.frame() %>%
  round_df(2) 

# plot
metafor::forest(fit_test_retest_icc,
                xlab = "Absolute Agreement ICC",
                addcred = TRUE,
                refline = FALSE,
                xlim = c(-1.5, 1.7),
                at = c(-0.5, -0.25, 0, 0.25, 0.5, 0.75, 1),
                mlab = add_heterogeneity_metrics_to_forest(fit_test_retest_icc))
text(-1.5, 10, "Study", pos = 4)
text(1.7, 10, "ICC [95% CI]", pos = 2)

# summarize results
meta_effect <- 
  paste0("Meta analysis: k = ", fit_test_retest_icc$k, 
         ", ICC2 = ",  round(predictions_test_retest_icc$pred, 2), 
         ", 95% CI [", round(predictions_test_retest_icc$ci.lb, 2), ", ", 
         round(predictions_test_retest_icc$ci.ub, 2), "]", 
         ", 95% CR [", round(predictions_test_retest_icc$cr.lb, 2), ", ", 
         round(predictions_test_retest_icc$cr.ub, 2), "]") 

meta_heterogeneity <- 
  paste0("Heterogeneity tests: Q(df = ", fit_test_retest_icc$k - 1, ") = ", round(fit_test_retest_icc$QE, 2), 
         ", p ", ifelse(fit_test_retest_icc$QEp < 0.0001, "< .0001", paste0("= ", as.character(round(fit_test_retest_icc$QEp, 4)))),
         ", tau^2 = ", round(fit_test_retest_icc$tau2, 2), 
         ", I^2 = ",   round(fit_test_retest_icc$I2, 2),
         "%, H^2 = ",   round(fit_test_retest_icc$H2, 2))

# write to disk for pdf plots
write_rds(fit_test_retest_icc, "models/fit_test_retest_icc.rds")

Meta effect: Meta analysis: k = 8, ICC2 = 0.2, 95% CI [0.05, 0.34], 95% CR [-0.15, 0.54].

Heterogeneity: Heterogeneity tests: Q(df = 7) = 21.44, p = 0.0032, tau^2 = 0.03, I^2 = 64.75%, H^2 = 2.84.

GOSH

if(!file.exists("models/fit_gosh_trt.rds")){
  
  fit_gosh_trt <- gosh(fit_test_retest_icc, 
                       parallel = "multicore", # change to "snow" on windows
                       ncpus = 4)
  
  write_rds(fit_gosh_trt, "models/fit_gosh_trt.rds")
  
} else {
  
  fit_gosh_trt <- read_rds("models/fit_gosh_trt.rds")
  
}

data_for_plotting_trt <- 
  as.tibble(fit_gosh_trt$res) %>%
  mutate(Type = "Other IRAPs",
         ICC = estimate) %>%
  # randomize rows to improve plotting
  sample_frac(size = 1)

p2 <- 
  ggplot(data_for_plotting_trt, aes(ICC, I2, color = Type)) +
  geom_point(alpha = 0.8) +
  scale_color_viridis_d(begin = 0.35, end = 0.65, direction = -1) +
  theme_classic() + 
  xlab("Test-retest reliability (ICC)") +
  ylab(expression("Heterogeneity (" * italic("I")^"2" * ")")) +
  theme(legend.position = "none") 

plot_gosh_trt <- 
  ggMarginal(p2, 
             type = "densigram",
             size = 4,
             margins = "both",
             groupFill = TRUE)

plot_gosh_trt

write_rds(plot_gosh_trt, "models/plot_gosh_trt.rds")
  
data_for_plotting_trt %>%
  summarize(ICC_median = median(ICC, na.rm = TRUE),
            ICC_ci_lower = quantile(ICC, 0.025, na.rm = TRUE),
            ICC_ci_upper = quantile(ICC, 0.975, na.rm = TRUE),
            I2_median = median(I2, na.rm = TRUE),
            I2_ci_lower = quantile(I2, 0.025, na.rm = TRUE),
            I2_ci_upper = quantile(I2, 0.975, na.rm = TRUE),
            H2_median = median(H2, na.rm = TRUE),
            H2_ci_lower = quantile(H2, 0.025, na.rm = TRUE),
            H2_ci_upper = quantile(H2, 0.975, na.rm = TRUE)) %>%
  round_df(2) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
ICC_median ICC_ci_lower ICC_ci_upper I2_median I2_ci_lower I2_ci_upper H2_median H2_ci_lower H2_ci_upper
0.19 0 0.38 61.57 0 83.02 2.6 1 5.89

Power implications

Maximum observed correlation of two measures is a function of their true correlation and the reliability (i.e., self-correlation) of each measure.

\(r_{xy}=\frac{\rho_{xy}}{\sqrt{\rho_{xx}\rho_{yy}}}\)

Internal Consistency

observed_r <- function(true_r, reliability_a, reliability_b) {
  round(true_r * sqrt(reliability_a*reliability_b), 2)
}

max_cor <- observed_r(true_r = 1.0, 
                      reliability_a = transf.iabt(predictions_permutations_sensitivity$pred), 
                      reliability_b = 1.0)

medium_cor <- observed_r(true_r = 0.5, 
                         reliability_a = transf.iabt(predictions_permutations_sensitivity$pred), 
                         reliability_b = 1.0)

Given this meta estimate of reliability, assuming that the internal consistency of an external variable is perfect (1.0), the maximum correlation between the IRAP and that external variable (i.e., when true correlation is 1.0) is 0.72. For a true correlation of medium size (r = .5), the observed correlation would be 0.36.

Test-retest

max_cor <- observed_r(true_r = 1.0, 
                      reliability_a = predictions_test_retest_icc$pred, 
                      reliability_b = 1.0)

medium_cor <- observed_r(true_r = 0.5, 
                         reliability_a = predictions_test_retest_icc$pred, 
                         reliability_b = 1.0)

Given this meta estimate of reliability, assuming that the internal consistency of an external variable is perfect (1.0), the maximum correlation between the IRAP and that external variable (i.e., when true correlation is 1.0) is 0.45. For a true correlation of medium size (r = .5), the observed correlation would be 0.22.

Potential ways to improve reliability

Lengthening the task

Although there was some variation in test lengths, this was in a very small range of domains that also appeared to be particularly stable ones. It’s not meaningful to do moderation meta analysis on the existing data.

Internal Consistency

The Spearman-Brown prediction formula can be rearranged to make prediction about how the length of the test would need to change to meet a goal reliability:

\(\rho_{xx'}^*=\frac {n\rho_{xx'}}{1+(n-1)\rho_{xx'}}\)

where \(\rho_{xx'}^*\) refers to the goal reliability, \(\rho_{xx'}\) refers to the current reliability, and \(n\) refers to the test length multiplier.

current_ic <- transf.iabt(predictions_permutations_sensitivity$pred)

goal_ic <- 0.70
ic_length_increase_.70 <- round((goal_ic*(1 - current_ic)) / (current_ic*(1 - goal_ic)), 1)

goal_ic <- 0.80
ic_length_increase_.80 <- round((goal_ic*(1 - current_ic)) / (current_ic*(1 - goal_ic)), 1)

goal_ic <- 0.90
ic_length_increase_.90 <- round((goal_ic*(1 - current_ic)) / (current_ic*(1 - goal_ic)), 1)

Using \(a\) = 0.51, the IRAP would have to be 2.2 times longer for it to have an internal consistency of >=.70, 3.8 times longer for it to have an internal consistency of >=.80, or 8.5 times longer for it to have an internal consistency of >=.90.

Test-retest

The Spearman-Brown prediction formula can be rearranged to make prediction about how the length of the test would need to change to meet a goal reliability:

\({\rho }_{xx'}^{*}={\frac {n{\rho }_{xx'}}{1+(n-1){\rho }_{xx'}}}\)

where \({\rho }_{xx'}^{*}\) refers to the goal reliability, \({\rho }_{xx'}\) refers to the current reliability, and \({n}\) refers to the test length multiplier.

current_test_retest <- predictions_test_retest_icc$pred

goal_test_retest <- 0.90
trt_length_increase_.90 <- round((goal_test_retest*(1 - current_test_retest)) / (current_test_retest*(1 - goal_test_retest)), 1)

goal_test_retest <- 0.80
trt_length_increase_.80 <- round((goal_test_retest*(1 - current_test_retest)) / (current_test_retest*(1 - goal_test_retest)), 1)

goal_test_retest <- 0.70
trt_length_increase_.70 <- round((goal_test_retest*(1 - current_test_retest)) / (current_test_retest*(1 - goal_test_retest)), 1)

Using the absolute agreement estimate of 0.2, the IRAP would have to be 9.3 times longer for it to have a test-retest of >=.70, 16 times longer for it to have a test-retest of >=.80, or 36 times longer for it to have a test-retest of >=.90.

Alternative scoring algorithm: A scores

Internal consistency using permutation approach sensitivity analysis, comparing D scoring and A scoring. Multilevel moderator meta analyses, acknowledging non-independence between scores produced for each domain, and moderator by scoring type.

fit_internal_consistency_D_vs_A_scores <- 
  bind_rows(data_A_scores_internal_consistency_permuted_estimates %>%
              filter(!str_detect(domain, "Sexuality")) %>%
              mutate(scoring = "A"),
            data_D_scores_internal_consistency_permuted_estimates %>%
              filter(!str_detect(domain, "Sexuality")) %>%
              mutate(scoring = "D")) %>%
  mutate(scoring = fct_relevel(scoring, "D", "A")) %>%
  rma.mv(yi     = yi, 
         V      = vi, 
         random = ~ 1 | domain,
         mods   = ~ scoring,
         data   = .,
         slab   = domain)

fit_internal_consistency_D_vs_A_scores
## 
## Multivariate Meta-Analysis Model (k = 66; method: REML)
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed  factor 
## sigma^2    0.0276  0.1662     33     no  domain 
## 
## Test for Residual Heterogeneity:
## QE(df = 64) = 54.8498, p-val = 0.7856
## 
## Test of Moderators (coefficient 2):
## QM(df = 1) = 0.5033, p-val = 0.4780
## 
## Model Results:
## 
##           estimate      se     zval    pval    ci.lb   ci.ub 
## intrcpt     0.7390  0.0636  11.6226  <.0001   0.6144  0.8636  *** 
## scoringA    0.0546  0.0769   0.7094  0.4780  -0.0962  0.2053      
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tibble(response_options = c("D", "A"),
       alpha = c(0.7453, 0.7453+0.0500),
       ci_lower = c(0.6201, 0.7453-0.1007),
       ci_upper = c(0.8705, 0.7453+0.2007)) %>%
  mutate_if(is.numeric, transf.iabt) %>%
  mutate_if(is.numeric, round, digits = 2) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
response_options alpha ci_lower ci_upper
D 0.53 0.46 0.58
A 0.55 0.48 0.61

Use only one block order

Internal consistency using permutation approach sensitivity analysis, multilevel moderator meta analysis between block orders for the domains that have both.

fit_internal_consistency_block_order <- 
  data_D_scores_internal_consistency_permuted_estimates_by_block_order %>%
  filter(!str_detect(domain, "Sexuality")) %>%
  rma.mv(yi     = yi, 
         V      = vi, 
         mods   = ~ block_order,
         random = ~ 1 | domain,
         data   = .,
         slab   = paste(domain, block_order))

fit_internal_consistency_block_order
## 
## Multivariate Meta-Analysis Model (k = 36; method: REML)
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed  factor 
## sigma^2    0.0000  0.0000     18     no  domain 
## 
## Test for Residual Heterogeneity:
## QE(df = 34) = 15.1504, p-val = 0.9978
## 
## Test of Moderators (coefficient 2):
## QM(df = 1) = 0.0577, p-val = 0.8102
## 
## Model Results:
## 
##                   estimate      se    zval    pval    ci.lb   ci.ub 
## intrcpt             0.6101  0.1063  5.7395  <.0001   0.4018  0.8184  *** 
## block_orderincon    0.0379  0.1580  0.2402  0.8102  -0.2717  0.3475      
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tibble(block_order = c("con first", "incon first"),
       alpha = c(0.6101, 0.6101+0.0379),
       ci_lower = c(0.4018, 0.6101-0.2717),
       ci_upper = c(0.8184,  0.6101+0.3475)) %>%
  mutate_if(is.numeric, transf.iabt) %>%
  mutate_if(is.numeric, round, digits = 2) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
block_order alpha ci_lower ci_upper
con first 0.46 0.33 0.56
incon first 0.48 0.29 0.62

Use only either fixed or moving response options

Internal consistency using permutation approach sensitivity analysis, moderator meta analysis between response option locations between domains.

fit_internal_consistency_response_option_locations <- 
  data_D_scores_internal_consistency_permuted_estimates %>%
  filter(!str_detect(domain, "Sexuality") &
           !is.na(response_option_locations)) %>%
  rma(yi   = yi, 
      vi   = vi, 
      mods = ~ response_option_locations,
      data = .,
      slab = paste(domain, response_option_locations))

fit_internal_consistency_response_option_locations
## 
## Mixed-Effects Model (k = 33; tau^2 estimator: REML)
## 
## tau^2 (estimated amount of residual heterogeneity):     0 (SE = 0.0208)
## tau (square root of estimated tau^2 value):             0
## I^2 (residual heterogeneity / unaccounted variability): 0.00%
## H^2 (unaccounted variability / sampling variability):   1.00
## R^2 (amount of heterogeneity accounted for):            0.00%
## 
## Test for Residual Heterogeneity:
## QE(df = 31) = 16.2264, p-val = 0.9865
## 
## Test of Moderators (coefficient 2):
## QM(df = 1) = 5.3664, p-val = 0.0205
## 
## Model Results:
## 
##                                  estimate      se     zval    pval    ci.lb 
## intrcpt                            0.9483  0.1138   8.3352  <.0001   0.7254 
## response_option_locationsMoving   -0.3000  0.1295  -2.3166  0.0205  -0.5539 
##                                    ci.ub 
## intrcpt                           1.1713  *** 
## response_option_locationsMoving  -0.0462    * 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tibble(response_options = c("static", "moving"),
       alpha = c(0.9483, 0.9483-0.2959),
       ci_lower = c(0.7254, 0.9483-0.5500),
       ci_upper = c(1.1713, 0.9483-0.0418)) %>%
  mutate_if(is.numeric, transf.iabt) %>%
  mutate_if(is.numeric, round, digits = 2) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
response_options alpha ci_lower ci_upper
static 0.61 0.52 0.69
moving 0.48 0.33 0.60

Session info

sessionInfo()
## R version 4.0.0 (2020-04-24)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.4
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_IE.UTF-8/en_IE.UTF-8/en_IE.UTF-8/C/en_IE.UTF-8/en_IE.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] scales_1.1.1     ggExtra_0.9      psych_1.9.12.31  kableExtra_1.1.0
##  [5] knitr_1.28       metafor_2.4-0    Matrix_1.2-18    forcats_0.5.0   
##  [9] stringr_1.4.0    dplyr_0.8.5      purrr_0.3.4      readr_1.3.1     
## [13] tidyr_1.0.3      tibble_3.0.1     ggplot2_3.3.0    tidyverse_1.3.0 
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.4.6      lubridate_1.7.8   lattice_0.20-41   assertthat_0.2.1 
##  [5] digest_0.6.25     mime_0.9          R6_2.4.1          cellranger_1.1.0 
##  [9] backports_1.1.6   reprex_0.3.0      evaluate_0.14     highr_0.8        
## [13] httr_1.4.1        pillar_1.4.4      rlang_0.4.6       readxl_1.3.1     
## [17] rstudioapi_0.11   miniUI_0.1.1.1    rmarkdown_2.1     labeling_0.3     
## [21] webshot_0.5.2     munsell_0.5.0     shiny_1.4.0.2     broom_0.5.6      
## [25] httpuv_1.5.2      compiler_4.0.0    modelr_0.1.7      xfun_0.13        
## [29] pkgconfig_2.0.3   mnormt_1.5-7      htmltools_0.4.0   tidyselect_1.0.0 
## [33] fansi_0.4.1       viridisLite_0.3.0 later_1.0.0       crayon_1.3.4     
## [37] dbplyr_1.4.3      withr_2.2.0       grid_4.0.0        xtable_1.8-4     
## [41] nlme_3.1-147      jsonlite_1.6.1    gtable_0.3.0      lifecycle_0.2.0  
## [45] DBI_1.1.0         magrittr_1.5      cli_2.0.2         stringi_1.4.6    
## [49] farver_2.0.3      promises_1.1.0    fs_1.4.1          xml2_1.3.2       
## [53] ellipsis_0.3.0    generics_0.0.2    vctrs_0.2.4       tools_4.0.0      
## [57] glue_1.4.0        hms_0.5.3         fastmap_1.0.1     parallel_4.0.0   
## [61] yaml_2.2.1        colorspace_1.4-1  rvest_0.3.5       haven_2.2.0